1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.solr;
19  
20  import org.apache.solr.common.params.ModifiableSolrParams;
21  import org.noggit.JSONUtil;
22  import org.noggit.ObjectBuilder;
23  import org.apache.solr.request.SolrQueryRequest;
24  import org.junit.BeforeClass;
25  import org.junit.Test;
26  import org.slf4j.Logger;
27  import org.slf4j.LoggerFactory;
28  
29  import java.lang.invoke.MethodHandles;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Collections;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.LinkedHashMap;
36  import java.util.List;
37  import java.util.Map;
38  import java.util.Set;
39  
40  public class TestJoin extends SolrTestCaseJ4 {
41  
42    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
43  
44    @BeforeClass
45    public static void beforeTests() throws Exception {
46      System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
47      initCore("solrconfig.xml","schema12.xml");
48    }
49  
50  
51    @Test
52    public void testJoin() throws Exception {
53      assertU(add(doc("id", "1","name", "john", "title", "Director", "dept_s","Engineering")));
54      assertU(add(doc("id", "2","name", "mark", "title", "VP", "dept_s","Marketing")));
55      assertU(add(doc("id", "3","name", "nancy", "title", "MTS", "dept_s","Sales")));
56      assertU(add(doc("id", "4","name", "dave", "title", "MTS", "dept_s","Support", "dept_s","Engineering")));
57      assertU(add(doc("id", "5","name", "tina", "title", "VP", "dept_s","Engineering")));
58  
59      assertU(add(doc("id","10", "dept_id_s", "Engineering", "text","These guys develop stuff")));
60      assertU(add(doc("id","11", "dept_id_s", "Marketing", "text","These guys make you look good")));
61      assertU(add(doc("id","12", "dept_id_s", "Sales", "text","These guys sell stuff")));
62      assertU(add(doc("id","13", "dept_id_s", "Support", "text","These guys help customers")));
63  
64      assertU(commit());
65  
66      ModifiableSolrParams p = params("sort","id asc");
67  
68      // test debugging
69      assertJQ(req(p, "q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id", "debugQuery","true")
70          ,"/debug/join/{!join from=dept_s to=dept_id_s}title:MTS=={'_MATCH_':'fromSetSize,toSetSize', 'fromSetSize':2, 'toSetSize':3}"
71      );
72  
73      assertJQ(req(p, "q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id")
74          ,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
75      );
76  
77      // empty from
78      assertJQ(req(p, "q","{!join from=noexist_s to=dept_id_s}*:*", "fl","id")
79          ,"/response=={'numFound':0,'start':0,'docs':[]}"
80      );
81  
82      // empty to
83      assertJQ(req(p, "q","{!join from=dept_s to=noexist_s}*:*", "fl","id")
84          ,"/response=={'numFound':0,'start':0,'docs':[]}"
85      );
86  
87      // self join... return everyone with she same title as Dave
88      assertJQ(req(p, "q","{!join from=title to=title}name:dave", "fl","id")
89          ,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
90      );
91  
92      // find people that develop stuff
93      assertJQ(req(p, "q","{!join from=dept_id_s to=dept_s}text:develop", "fl","id")
94          ,"/response=={'numFound':3,'start':0,'docs':[{'id':'1'},{'id':'4'},{'id':'5'}]}"
95      );
96  
97      // self join on multivalued text field
98      assertJQ(req(p, "q","{!join from=title to=title}name:dave", "fl","id")
99          ,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
100     );
101 
102     assertJQ(req(p, "q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id", "debugQuery","true")
103         ,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
104     );
105     
106     // expected outcome for a sub query matching dave joined against departments
107     final String davesDepartments = 
108       "/response=={'numFound':2,'start':0,'docs':[{'id':'10'},{'id':'13'}]}";
109 
110     // straight forward query
111     assertJQ(req(p, "q","{!join from=dept_s to=dept_id_s}name:dave",
112                  "fl","id"),
113              davesDepartments);
114 
115     // variable deref for sub-query parsing
116     assertJQ(req(p, "q","{!join from=dept_s to=dept_id_s v=$qq}",
117                  "qq","{!dismax}dave",
118                  "qf","name",
119                  "fl","id", 
120                  "debugQuery","true"),
121              davesDepartments);
122 
123     // variable deref for sub-query parsing w/localparams
124     assertJQ(req(p, "q","{!join from=dept_s to=dept_id_s v=$qq}",
125                  "qq","{!dismax qf=name}dave",
126                  "fl","id", 
127                  "debugQuery","true"),
128              davesDepartments);
129 
130     // defType local param to control sub-query parsing
131     assertJQ(req(p, "q","{!join from=dept_s to=dept_id_s defType=dismax}dave",
132                  "qf","name",
133                  "fl","id", 
134                  "debugQuery","true"),
135              davesDepartments);
136 
137     // find people that develop stuff - but limit via filter query to a name of "john"
138     // this tests filters being pushed down to queries (SOLR-3062)
139     assertJQ(req(p, "q","{!join from=dept_id_s to=dept_s}text:develop", "fl","id", "fq", "name:john")
140              ,"/response=={'numFound':1,'start':0,'docs':[{'id':'1'}]}"
141             );
142 
143   }
144 
145 
146   @Test
147   public void testRandomJoin() throws Exception {
148     int indexIter=50 * RANDOM_MULTIPLIER;
149     int queryIter=50 * RANDOM_MULTIPLIER;
150 
151     // groups of fields that have any chance of matching... used to
152     // increase test effectiveness by avoiding 0 resultsets much of the time.
153     String[][] compat = new String[][] {
154         {"small_s","small2_s","small2_ss","small3_ss"},
155         {"small_i","small2_i","small2_is","small3_is"}
156     };
157 
158 
159     while (--indexIter >= 0) {
160       int indexSize = random().nextInt(20 * RANDOM_MULTIPLIER);
161 
162       List<FldType> types = new ArrayList<>();
163       types.add(new FldType("id",ONE_ONE, new SVal('A','Z',4,4)));
164       types.add(new FldType("score_f",ONE_ONE, new FVal(1,100)));  // field used to score
165       types.add(new FldType("small_s",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
166       types.add(new FldType("small2_s",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
167       types.add(new FldType("small2_ss",ZERO_TWO, new SVal('a',(char)('c'+indexSize/3),1,1)));
168       types.add(new FldType("small3_ss",new IRange(0,25), new SVal('A','z',1,1)));
169       types.add(new FldType("small_i",ZERO_ONE, new IRange(0,5+indexSize/3)));
170       types.add(new FldType("small2_i",ZERO_ONE, new IRange(0,5+indexSize/3)));
171       types.add(new FldType("small2_is",ZERO_TWO, new IRange(0,5+indexSize/3)));
172       types.add(new FldType("small3_is",new IRange(0,25), new IRange(0,100)));
173 
174       clearIndex();
175       Map<Comparable, Doc> model = indexDocs(types, null, indexSize);
176       Map<String, Map<Comparable, Set<Comparable>>> pivots = new HashMap<>();
177 
178       for (int qiter=0; qiter<queryIter; qiter++) {
179         String fromField;
180         String toField;
181         if (random().nextInt(100) < 5) {
182           // pick random fields 5% of the time
183           fromField = types.get(random().nextInt(types.size())).fname;
184           // pick the same field 50% of the time we pick a random field (since other fields won't match anything)
185           toField = (random().nextInt(100) < 50) ? fromField : types.get(random().nextInt(types.size())).fname;
186         } else {
187           // otherwise, pick compatible fields that have a chance of matching indexed tokens
188           String[] group = compat[random().nextInt(compat.length)];
189           fromField = group[random().nextInt(group.length)];
190           toField = group[random().nextInt(group.length)];
191         }
192 
193         Map<Comparable, Set<Comparable>> pivot = pivots.get(fromField+"/"+toField);
194         if (pivot == null) {
195           pivot = createJoinMap(model, fromField, toField);
196           pivots.put(fromField+"/"+toField, pivot);
197         }
198 
199         Collection<Doc> fromDocs = model.values();
200         Set<Comparable> docs = join(fromDocs, pivot);
201         List<Doc> docList = new ArrayList<>(docs.size());
202         for (Comparable id : docs) docList.add(model.get(id));
203         Collections.sort(docList, createComparator("_docid_",true,false,false,false));
204         List sortedDocs = new ArrayList();
205         for (Doc doc : docList) {
206           if (sortedDocs.size() >= 10) break;
207           sortedDocs.add(doc.toObject(h.getCore().getLatestSchema()));
208         }
209 
210         Map<String,Object> resultSet = new LinkedHashMap<>();
211         resultSet.put("numFound", docList.size());
212         resultSet.put("start", 0);
213         resultSet.put("docs", sortedDocs);
214 
215         // todo: use different join queries for better coverage
216 
217         SolrQueryRequest req = req("wt","json","indent","true", "echoParams","all",
218             "q","{!join from="+fromField+" to="+toField
219                 + (random().nextInt(4)==0 ? " fromIndex=collection1" : "")
220                 +"}*:*"
221         );
222 
223         String strResponse = h.query(req);
224 
225         Object realResponse = ObjectBuilder.fromJSON(strResponse);
226         String err = JSONTestUtil.matchObj("/response", realResponse, resultSet);
227         if (err != null) {
228           log.error("JOIN MISMATCH: " + err
229            + "\n\trequest="+req
230            + "\n\tresult="+strResponse
231            + "\n\texpected="+ JSONUtil.toJSON(resultSet)
232            + "\n\tmodel="+ model
233           );
234 
235           // re-execute the request... good for putting a breakpoint here for debugging
236           String rsp = h.query(req);
237 
238           fail(err);
239         }
240 
241       }
242     }
243   }
244 
245 
246   Map<Comparable, Set<Comparable>> createJoinMap(Map<Comparable, Doc> model, String fromField, String toField) {
247     Map<Comparable, Set<Comparable>> id_to_id = new HashMap<>();
248 
249     Map<Comparable, List<Comparable>> value_to_id = invertField(model, toField);
250 
251     for (Comparable fromId : model.keySet()) {
252       Doc doc = model.get(fromId);
253       List<Comparable> vals = doc.getValues(fromField);
254       if (vals == null) continue;
255       for (Comparable val : vals) {
256         List<Comparable> toIds = value_to_id.get(val);
257         if (toIds == null) continue;
258         Set<Comparable> ids = id_to_id.get(fromId);
259         if (ids == null) {
260           ids = new HashSet<>();
261           id_to_id.put(fromId, ids);
262         }
263         for (Comparable toId : toIds)
264           ids.add(toId);
265       }
266     }
267 
268     return id_to_id;
269   }
270 
271 
272   Set<Comparable> join(Collection<Doc> input, Map<Comparable, Set<Comparable>> joinMap) {
273     Set<Comparable> ids = new HashSet<>();
274     for (Doc doc : input) {
275       Collection<Comparable> output = joinMap.get(doc.id);
276       if (output == null) continue;
277       ids.addAll(output);
278     }
279     return ids;
280   }
281 
282 }